
##################################################
##REPLICATION SCRIPT FOR##########################
##Andreas Dür and Markus Gastinger################
##Spinning a global web of EU external relations##
##JEPP 2022#######################################
##################################################


# prelims -----------------------------------------------------------------


rm(list=ls())
cat("\014")
dev.off()
options(max.print=100000, "scipen" = 100, warn = 1)
Sys.setenv(LANG = "en")
      
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
path1 <- paste0(getwd(),"/")

      
# load packages -----------------------------------------------------------


library(pacman)
p_load(tidyverse, corrplot, sampleSelection, grid, gridExtra, stargazer, clipr, prediction)
sessionInfo()


# custom functions --------------------------------------------------------


stderr <- function(x) sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))

'%!in%' <- function(x,y)!('%in%'(x,y))


# load data -----------------------------------------------------------


df1 <- read.csv("Duer_Gastinger_JEPP_replication_data.csv")


# calculate predictors ----------------------------------------------------


#recode  variables
df1$lsa_rec <- ((df1$lsa - min(df1$lsa, na.rm=T))/(max(df1$lsa, na.rm=T) - min(df1$lsa, na.rm=T)))*10
df1$gdppc_mean <- df1$gdppc_mean/1000
df1$fh_max1 <- (df1$fh_max * -1) + 8  ##turn around FH value so that more dem==higher value - max then is actually the min value

#logged versions of variables for summary statistics
df1$pop_sum_ln <- log(df1$pop_sum)
df1$distance_ln <- log(df1$distance)
df1$gdp_max_ln <- log(df1$gdp_max)
df1$gdppc_mean_ln <- log(df1$gdppc_mean)

#accession countries
df1$eu_accession <- 
  ifelse(df1$country1=="Albania" & df1$year>2008 |
           df1$country1=="Bosnia & Herzegovina" & df1$year>2015 |
           df1$country1=="Bulgaria" & df1$year>1995 |           
           df1$country1=="Croatia" & df1$year>2002 |
           df1$country1=="Denmark" & df1$year>1973 |
           df1$country1=="Iceland" & df1$year>2009 |
           df1$country1=="Montenegro" & df1$year>2008 |
           df1$country1=="Norway" & df1$year>1992 |
           df1$country1=="Turkey" & df1$year>1987 |
           df1$country1=="Serbia" & df1$year>2009 |
           df1$country1=="Switzerland" & df1$year>1992 |
           df1$country1=="Liechtenstein" & df1$year>1992 |
           df1$country1=="Former Yugoslav Republic of Macedonia" & df1$year>2003 |
           df1$country1=="Romania" & df1$year>1994, 1, 0)

##policy areas
df1$economic <- ifelse(df1$pa %in% c("CCP", "Economic and monetary affairs", "Maritime Affairs and Fisheries",
                                      "Taxation", "Transport"), 1, 0)
df1$foreign <- ifelse(df1$pa %in% c("FSP", "Development"), 1, 0)

#general_agreement
df1$general_agreement <- ifelse(df1$pa.two.or.more == 1, 1, 0)
df1$general_agreement <- ifelse(df1$association.agreement==1, 1, df1$general_agreement)


# descriptive tables and charts for appendix -----------------------------------------


#Table A1
names_items <- c("JB.two.or.more.types", "JB.level", "JB.RoP", "JB.decisions",
                 "JB.amend","JB.subbodies", "JB.frequency", "JB.mentions")
df1[,names_items] <- apply(df1[,names_items], 2, function(x) ifelse(df1$JB==0, NA, x))
apply(df1[,names_items], 2, function(x) table(x))

#Figure A1
tmp <- as.data.frame(table(df1[df1$JB == 1,"jb_strength"]))
tmp$perc <- round((tmp$Freq/(sum(tmp$Freq)))*100, digits = 1)
ggplot(data=tmp, aes(x=Var1, y=perc)) +
  geom_bar(stat="identity") +
  labs(title = "", x = "JB strength", y = "Percent") +
  theme_bw()

#Figure A2: correlation matrix indicators
df_x <- df1[,names_items]
names(df_x) <- c("Two or more", "Levels", "Rules of procedure", 
                 "Decisions", "Amend",   "Subbodies", "Frequency","Mentions")
cor_df <- cor(df_x, use="pairwise.complete.obs")
corrplot(cor_df, method = 'number', tl.col = "black")


#Figure A3: year signed
df_y <- as.data.frame(table(df1$year))
df_y$Var1 <- as.character(df_y$Var1)
df_y$jbs <- tapply(df1$JB, df1$year, sum, na.rm=T)
df_y1 <- data.frame(year=c(df_y$Var1, df_y$Var1), freq=c(df_y$Freq, df_y$jbs), 
                    label=c(rep("Overall", 27), rep("JBs", 27)))
df_y1$label <- factor(df_y1$label, levels = c("Overall", "JBs"), ordered=TRUE)
ggplot(df_y1, aes(x=year, y=freq, fill=label)) +
  geom_col(position = position_dodge(preserve = 'single')) +
  labs(y="Number of agreements/JBs", x="Year signed") +
  scale_fill_manual(values=c("#003366","#FF9999")) +
  guides(fill=guide_legend(title="")) +
  theme_bw()

#Figure A4: policy field
df_p <- as.data.frame(table(df1$pa))
df_p$Var1 <- as.character(df_p$Var1)
df_p$jbs <- tapply(df1$JB, df1$pa, sum, na.rm=T)
df_p1 <- data.frame(policy=c(df_p$Var1, df_p$Var1), freq=c(df_p$Freq, df_p$jbs), 
                    freq1=c(df_p$Freq, df_p$Freq), label=c(rep("Overall", 18), rep("JBs", 18)))
df_p1$label <- factor(df_p1$label, levels = c("Overall", "JBs"), ordered=TRUE)
ggplot(df_p1, aes(x=reorder(policy, -freq1), y=freq, fill=label)) +
  geom_col(position = position_dodge(preserve = 'single')) +
  labs(y="Number of agreements/JBs", x="") +
  scale_fill_manual(values=c("#003366","#FF9999")) +
  guides(fill=guide_legend(title="")) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 8))

#Figure A5: correlation plot for appendix
df_x <- df1[,c("pop_sum_ln", "distance_ln", "general_agreement",  
               "plurilateral", "gdppc_mean_ln", "fh_max1", "economic", "foreign", "indefinite")]
names(df_x) <- c("Population (sum, log)", "Distance (log)", "Broad agreement",
                 "Plurilateral", "GDPpc (mean, log)",
                 "Democracy (min)", "Economic policy",  "Foreign policy", "Indefinite")
cor_df = cor(df_x, use="pairwise.complete.obs")
corrplot(cor_df, method = 'number', tl.col = "black")

#Table A2: summary statistics
stargazer(df1[,c("JB", "jb_strength", "lsa_rec", "pop_sum_ln", "distance_ln", "general_agreement",  
                 "plurilateral", "gdppc_mean_ln", "fh_max1", "economic", "foreign", 
                 "indefinite")], 
          median=TRUE, digits=2, type="html",
          covariate.labels= c("JB","JB strength", "JB strength (LSA)",
                              "Population (sum, log)", "Distance (log)", "Broad scope",
                              "Plurilateral", "GDPpc (mean, log)",
                              "Democracy (min)", "Economic policy",  "Foreign policy", "Indefinite"))
write_last_clip()


# base model ----------------------------------------------------


selectEq <- JB ~ 
  pop_sum_ln + 
  log(distance) +
  general_agreement +
  plurilateral +  
  log(gdppc_mean) + 
  fh_max1 +   
  economic + foreign + 
  as.factor(year) +
  indefinite +  ##instrument
  NULL
outcomeEq <- jb_strength ~  
  pop_sum_ln + 
  log(distance) +
  general_agreement +
  plurilateral +  
  log(gdppc_mean) + 
  fh_max1 +   
  economic + foreign + 
  as.factor(year) +
  NULL

ms <- heckit(selectEq, outcomeEq, data = df1, method = "2step")
summary(ms)


# robustness checks -------------------------------------------------------


#alt dep var
summary(mr1 <- heckit(selectEq, update.formula(outcomeEq, lsa_rec ~ . ), data = df1, method = "2step"))

#gdp instead of pop
summary(mr2 <- heckit(update.formula(selectEq,  ~ . - pop_sum_ln + gdp_max_ln), update.formula(outcomeEq,  ~ . - pop_sum_ln + gdp_max_ln), data = df1, method = "2step"))

#control for eu accession candidates
summary(mr3 <- heckit(update.formula(selectEq,  ~ . + eu_accession), update.formula(outcomeEq,  ~ . + eu_accession), data = df1, method = "2step"))

#use pages instead of general_agreement
summary(mr4 <- heckit(update.formula(selectEq,  ~ . - general_agreement + pages), update.formula(outcomeEq,  ~ . - general_agreement + pages), data = df1, method = "2step"))

#control for earlier JBs
summary(mr5 <- heckit(update.formula(selectEq,  ~ . + jb_lag), update.formula(outcomeEq,  ~ . + jb_lag), data = df1, method = "2step"))

#control for continent
summary(mr6 <- heckit(update.formula(selectEq,  ~ . + continent), update.formula(outcomeEq,  ~ . + continent), data = df1, method = "2step"))

#control for depth
summary(mr7 <- heckit(update.formula(selectEq,  ~ . + framework.agreement + cooperation.agreement + association.agreement), update.formula(outcomeEq,  ~ . + framework.agreement + cooperation.agreement + association.agreement), data = df1, method = "2step"))

#each item in the DV index separately
summary(mri1 <- heckit(update.formula(selectEq,  ~ . ), update.formula(outcomeEq,  JB.two.or.more.types ~ . ), data = df1, method = "2step"))
summary(mri2 <- heckit(update.formula(selectEq,  ~ . ), update.formula(outcomeEq,  JB.RoP ~ . ), data = df1, method = "2step"))
summary(mri3 <- heckit(update.formula(selectEq,  ~ . ), update.formula(outcomeEq,  JB.level ~ . ), data = df1, method = "2step"))
summary(mri4 <- heckit(update.formula(selectEq,  ~ . ), update.formula(outcomeEq,  JB.decisions ~ . ), data = df1, method = "2step"))
summary(mri5 <- heckit(update.formula(selectEq,  ~ . ), update.formula(outcomeEq,  JB.frequency ~ . ), data = df1, method = "2step"))
summary(mri6 <- heckit(update.formula(selectEq,  ~ . ), update.formula(outcomeEq,  JB.subbodies ~ . ), data = df1, method = "2step"))
summary(mri7 <- heckit(update.formula(selectEq,  ~ . ), update.formula(outcomeEq,  JB.amend ~ . ), data = df1, method = "2step"))
summary(mri8 <- heckit(update.formula(selectEq,  ~ . ), update.formula(outcomeEq,  JB.mentions ~ . ), data = df1, method = "2step"))


# Export models --------------------------------------------------


#Table 1: baseline
ms1 <- ms #this is a work-around to export both parts of model via stargazer
ms1$param$index$betaO <- ms$param$index$betaS
ms1$param$index$betaS <- ms$param$index$betaO

stargazer(ms1, ms, mr1,         
          column.labels = c("<em>Model 1 (Selection eq.)</em>", "<em>Model 1 (Outcome eq.)</em>", "<em>Model 2 (Outcome eq.)</em>"), 
          digits=3, type="html", 
          covariate.labels = c("Population (sum, log)", "Distance (log)", "Broad scope",   
                               "Plurilateral", "GDPpc (mean, log)", "Democracy (min)", 
                               "Economic policy", "Foreign policy", 1993:2018, 
                               "Indefinite", "Constant"), 
          dep.var.labels.include=FALSE,
          dep.var.caption = "")
write_last_clip()

#Table A3: robustness checks
stargazer(mr2, mr3, mr4, mr5, mr6, mr7,
          column.labels = c("<em>Model 3</em>", "<em>Model 4</em>", "<em>Model 5</em>", "<em>Model 6</em>", "<em>Model 7</em>", "<em>Model 8</em>"), 
          digits=3, type="html", 
          covariate.labels = c("Population (sum, log)", "Distance (log)", "Broad scope",   
                               "Plurilateral", "GDPpc (mean, log)", "Democracy (min)", "Economic policy",  
                               "Foreign policy", 1993:2018, "GDP (max, log)", "EU accession",
                               "Pages", "Prior JB", "Americas", "Asia", "Europe", "Intercontinental", "Oceania",
                               "Framework agreement", "Cooperation agreement", "Association agreement", 
                               "Constant"),
          dep.var.labels.include=FALSE,
          dep.var.caption = "")
write_last_clip()

#Table A4: individual items as DV
stargazer(mri1, mri2, mri3, mri4, mri5, mri6, mri7, mri8,        
          column.labels = c("<em>Two levels</em>", "<em>Rules of procedure</em>", "<em>Ministers</em>",
                            "<em>Decisions</em>", "<em>Frequency</em>", "<em>Subbodies</em>", "<em>Amend</em>", 
                            "<em>Mentions</em>"), 
          digits=3, type="html", 
          covariate.labels = c("Population (sum, log)", "Distance (log)", "Broad scope",   
                               "Plurilateral", "GDPpc (mean, log)", "Democracy (min)", 
                               "Economic policy", "Foreign policy", 1993:2018, 
                               "Constant"), 
          dep.var.labels.include=FALSE,
          dep.var.caption = "")
write_last_clip()


# substantive effects -------------------------------------------------------


df_nona <- df1[!is.na(df1$fh_max),]

#base model
summary(mp <- probit(selectEq, data=df_nona))
df_nona$IMR <- invMillsRatio(mp)$IMR1
summary(mo <- lm(update(outcomeEq, ~ . + IMR), data = df_nona[df_nona$JB == 1, ]))

summary(prediction(mo, at=list(pop_sum_ln=quantile(df_nona$pop_sum_ln, na.rm=TRUE)))) #Vatican is not in analysis, as no democracy value
summary(prediction(mo, at=list(distance=quantile(df_nona$distance, na.rm=TRUE))))
summary(prediction(mo, at=list(general_agreement=c(0,1))))
